You need to install several R packages from the
BupaRverse.
install.packages("bupaR")
install.packages("edeaR")
install.packages("eventdataR")
install.packages("processmapR")
install.packages("processanimateR")
install.packages("petrinetR")
library(bupaR)
## Warning: package 'bupaR' was built under R version 4.1.2
##
## Attaching package: 'bupaR'
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:utils':
##
## timestamp
bupaR autmaticaly loads eventdataR
#library(eventdataR)
When loading the bupaR package, the traffic_fines
dataset is also loaded into memory.
traffic_fines
## # A tibble: 34,724 × 18
## case_id activity lifecycle resource timestamp amount article
## <chr> <fct> <fct> <fct> <dttm> <dbl> <int>
## 1 A1 Create Fine complete 561 2006-07-24 00:00:00 350 157
## 2 A1 Send Fine complete <NA> 2006-12-05 00:00:00 NA NA
## 3 A100 Create Fine complete 561 2006-08-02 00:00:00 350 157
## 4 A100 Send Fine complete <NA> 2006-12-12 00:00:00 NA NA
## 5 A100 Insert Fine No… complete <NA> 2007-01-15 00:00:00 NA NA
## 6 A100 Add penalty complete <NA> 2007-03-16 00:00:00 715 NA
## 7 A100 Send for Credi… complete <NA> 2009-03-30 00:00:00 NA NA
## 8 A10000 Create Fine complete 561 2007-03-09 00:00:00 360 157
## 9 A10000 Send Fine complete <NA> 2007-07-17 00:00:00 NA NA
## 10 A10000 Insert Fine No… complete <NA> 2007-08-02 00:00:00 NA NA
## # … with 34,714 more rows, and 11 more variables: dismissal <chr>,
## # expense <dbl>, lastsent <chr>, matricola <chr>, notificationtype <chr>,
## # paymentamount <dbl>, points <int>, totalpaymentamount <chr>,
## # vehicleclass <chr>, activity_instance_id <chr>, .order <int>
We have to turn the traffic_fines dataset into an
eventlog/log class using the simple_eventlog function.
log_traffic_fines <- simple_eventlog(traffic_fines)
## Warning: `simple_eventlog()` was deprecated in bupaR 5.0.0.
## Please use `activitylog()` instead.
For this dataset, you can also just add the “log” class to the
R object.
log_traffic_fines <- traffic_fines
class(log_traffic_fines) <- c("log",class(log_traffic_fines))
eventdataR package, retrieve the datasets:sepsishospital_billinghospitalpatientscase_list(log_traffic_fines)
## # A tibble: 10,000 × 3
## case_id trace trace_id
## <chr> <chr> <dbl>
## 1 A2127 Create Fine,Payment 1
## 2 A15 Create Fine,Send Fine,Insert Fine Notification,Add penalty,… 25
## 3 A1820 Create Fine,Payment 1
## 4 A22 Create Fine,Send Fine,Insert Fine Notification,Add penalty,… 19
## 5 A1451 Create Fine,Send Fine,Insert Fine Notification,Add penalty,… 25
## 6 A23 Create Fine,Send Fine,Insert Fine Notification,Add penalty,… 20
## 7 A24 Create Fine,Send Fine 7
## 8 A25 Create Fine,Send Fine,Insert Fine Notification,Add penalty,… 25
## 9 A1452 Create Fine,Send Fine,Insert Fine Notification,Add penalty,… 25
## 10 A1453 Create Fine,Send Fine,Insert Fine Notification,Add penalty,… 25
## # … with 9,990 more rows
A more detailed view on lists:
cases(log_traffic_fines)
## # A tibble: 10,000 × 10
## case_id trace_length number_of_activ… start_timestamp complete_timestamp
## <chr> <int> <int> <dttm> <dttm>
## 1 A1 2 2 2006-07-24 00:00:00 2006-12-05 00:00:00
## 2 A100 5 5 2006-08-02 00:00:00 2009-03-30 00:00:00
## 3 A10000 5 5 2007-03-09 00:00:00 2008-09-09 00:00:00
## 4 A10001 6 6 2007-03-19 00:00:00 2007-09-24 00:00:00
## 5 A10004 5 5 2007-03-20 00:00:00 2009-03-30 00:00:00
## 6 A10005 2 2 2007-03-20 00:00:00 2007-03-21 00:00:00
## 7 A10007 2 2 2007-03-20 00:00:00 2007-03-23 00:00:00
## 8 A10008 5 5 2007-03-20 00:00:00 2009-03-30 00:00:00
## 9 A10009 6 5 2007-03-20 00:00:00 2007-10-31 00:00:00
## 10 A1001 5 5 2006-08-02 00:00:00 2009-03-30 00:00:00
## # … with 9,990 more rows, and 5 more variables: trace <chr>, trace_id <dbl>,
## # duration <drtn>, first_activity <fct>, last_activity <fct>
case_length_dist_tf <- sapply(strsplit(case_list(log_traffic_fines)$trace, ","), length)
table(case_length_dist_tf)
## case_length_dist_tf
## 2 3 4 5 6 7 8 9
## 5318 42 5 4031 542 10 3 49
barplot(table(case_length_dist_tf))
quantile(case_length_dist_tf,probs = seq(0, 1, 0.1))
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 2 2 2 2 2 2 5 5 5 5 9
log_traffic_fines %>%
durations(units="hours") %>% # duration of the cases
as.data.frame(units="hours") %>% # cases and their duration
ggplot(aes(x=duration))+
geom_density(color="#1F77b4",
fill="#17BECF",
alpha=.5)+
theme_minimal()
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.
trace_list(log_traffic_fines)
## # A tibble: 44 × 3
## trace absolute_freque… relative_freque…
## <chr> <int> <dbl>
## 1 Create Fine,Payment 3428 0.343
## 2 Create Fine,Send Fine,Insert Fine Notifica… 3273 0.327
## 3 Create Fine,Send Fine 1890 0.189
## 4 Create Fine,Send Fine,Insert Fine Notifica… 758 0.0758
## 5 Create Fine,Send Fine,Insert Fine Notifica… 250 0.025
## 6 Create Fine,Send Fine,Insert Fine Notifica… 151 0.0151
## 7 Create Fine,Send Fine,Insert Fine Notifica… 78 0.0078
## 8 Create Fine,Send Fine,Payment 37 0.0037
## 9 Create Fine,Send Fine,Insert Fine Notifica… 24 0.0024
## 10 Create Fine,Send Fine,Insert Fine Notifica… 13 0.0013
## # … with 34 more rows
sepsishospital_billinghospitalpatientsFor instance, here are the results for the sepsis log.
## # A tibble: 15,214 × 34
## case_id activity lifecycle resource timestamp age crp diagnose
## <chr> <fct> <fct> <fct> <dttm> <int> <dbl> <chr>
## 1 A ER Regis… complete A 2014-10-22 11:15:41 85 NA A
## 2 A Leucocyt… complete B 2014-10-22 11:27:00 NA NA <NA>
## 3 A CRP complete B 2014-10-22 11:27:00 NA 210 <NA>
## 4 A LacticAc… complete B 2014-10-22 11:27:00 NA NA <NA>
## 5 A ER Triage complete C 2014-10-22 11:33:37 NA NA <NA>
## 6 A ER Sepsi… complete A 2014-10-22 11:34:00 NA NA <NA>
## 7 A IV Liquid complete A 2014-10-22 14:03:47 NA NA <NA>
## 8 A IV Antib… complete A 2014-10-22 14:03:47 NA NA <NA>
## 9 A Admissio… complete D 2014-10-22 14:13:19 NA NA <NA>
## 10 A CRP complete B 2014-10-24 09:00:00 NA 1090 <NA>
## # … with 15,204 more rows, and 26 more variables: diagnosticartastrup <chr>,
## # diagnosticblood <chr>, diagnosticecg <chr>, diagnosticic <chr>,
## # diagnosticlacticacid <chr>, diagnosticliquor <chr>, diagnosticother <chr>,
## # diagnosticsputum <chr>, diagnosticurinaryculture <chr>,
## # diagnosticurinarysediment <chr>, diagnosticxthorax <chr>, disfuncorg <chr>,
## # hypotensie <chr>, hypoxie <chr>, infectionsuspected <chr>, infusion <chr>,
## # lacticacid <chr>, leucocytes <chr>, oligurie <chr>, …
## # A tibble: 1,050 × 3
## case_id trace trace_id
## <chr> <chr> <dbl>
## 1 XJ ER Registration,ER Triage,ER Sepsis Triage,LacticAcid,Leuco… 496
## 2 I ER Registration,ER Triage,ER Sepsis Triage,IV Antibiotics,L… 217
## 3 WEA ER Registration,ER Triage,ER Sepsis Triage,CRP,Leucocytes,I… 147
## 4 OT ER Registration,ER Triage,ER Sepsis Triage,LacticAcid,Leuco… 470
## 5 ULA ER Registration,IV Liquid,ER Triage,Leucocytes,CRP,LacticAc… 772
## 6 BV ER Registration,ER Triage,ER Sepsis Triage,CRP,LacticAcid,L… 110
## 7 MY ER Registration,ER Triage,ER Sepsis Triage,CRP,LacticAcid,L… 74
## 8 ZS ER Registration,ER Triage,ER Sepsis Triage,Leucocytes,CRP,L… 625
## 9 SGA ER Registration,ER Triage,CRP,LacticAcid,Leucocytes,ER Seps… 37
## 10 QKA ER Registration,ER Triage,ER Sepsis Triage,Leucocytes,CRP,L… 623
## # … with 1,040 more rows
## # A tibble: 1,050 × 10
## case_id trace_length number_of_activ… start_timestamp complete_timestamp
## <chr> <int> <int> <dttm> <dttm>
## 1 <NA> 24 10 2014-11-10 01:45:19 2014-11-22 14:30:00
## 2 A 22 10 2014-10-22 11:15:41 2014-11-02 15:15:00
## 3 AA 8 8 2014-12-03 09:06:44 2014-12-03 14:28:01
## 4 AAA 11 11 2014-11-19 03:16:21 2014-11-28 16:15:17
## 5 AB 8 8 2014-02-16 09:55:43 2014-02-16 13:52:06
## 6 ABA 17 10 2014-10-12 11:22:24 2014-10-18 16:15:00
## 7 AC 13 11 2014-09-24 15:39:13 2014-12-02 18:47:17
## 8 ACA 8 8 2014-09-22 10:00:00 2014-09-22 10:45:56
## 9 AD 29 12 2013-12-19 09:04:38 2014-05-04 18:34:00
## 10 ADA 24 10 2014-06-21 07:14:16 2014-07-06 20:58:00
## # … with 1,040 more rows, and 5 more variables: trace <chr>, trace_id <dbl>,
## # duration <drtn>, first_activity <fct>, last_activity <fct>
## case_length_dist_se
## 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 35 1 61 32 27 100 41 50 50 81 95 79 74 69 42 31 21 19 21 15
## 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 43
## 15 8 6 3 6 4 9 8 5 4 5 2 2 3 3 1 2 2 2 1
## 45 48 51 52 59 61 66 84 88 118 170 185
## 2 1 1 2 1 2 1 1 1 1 1 1
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 3 6 8 10 12 13 14 16 18 23 185
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.
The print function displays a summary of the log. The
same result is provided by just typinf the log’s object name in the
Rconsole.
print(log_traffic_fines)
## # Log of 34724 events consisting of:
## 44 traces
## 10000 cases
## 34724 instances of 11 activities
## 1 resource
## Events occurred from 2006-06-17 until 2012-03-26
##
## # Variables were mapped as follows:
## Case identifier: case_id
## Activity identifier: activity
## Resource identifier: resource_id
## Activity instance identifier: activity_instance_id
## Timestamp: timestamp
## Lifecycle transition: lifecycle_id
##
## # A tibble: 34,724 × 20
## case_id activity lifecycle resource timestamp amount article
## <chr> <fct> <fct> <fct> <dttm> <dbl> <int>
## 1 A1 Create Fine complete 561 2006-07-24 00:00:00 350 157
## 2 A1 Send Fine complete <NA> 2006-12-05 00:00:00 NA NA
## 3 A100 Create Fine complete 561 2006-08-02 00:00:00 350 157
## 4 A100 Send Fine complete <NA> 2006-12-12 00:00:00 NA NA
## 5 A100 Insert Fine No… complete <NA> 2007-01-15 00:00:00 NA NA
## 6 A100 Add penalty complete <NA> 2007-03-16 00:00:00 715 NA
## 7 A100 Send for Credi… complete <NA> 2009-03-30 00:00:00 NA NA
## 8 A10000 Create Fine complete 561 2007-03-09 00:00:00 360 157
## 9 A10000 Send Fine complete <NA> 2007-07-17 00:00:00 NA NA
## 10 A10000 Insert Fine No… complete <NA> 2007-08-02 00:00:00 NA NA
## # … with 34,714 more rows, and 13 more variables: dismissal <chr>,
## # expense <dbl>, lastsent <chr>, matricola <chr>, notificationtype <chr>,
## # paymentamount <dbl>, points <int>, totalpaymentamount <chr>,
## # vehicleclass <chr>, activity_instance_id <int>, .order <int>,
## # resource_id <chr>, lifecycle_id <chr>
To get the number of events:
n_events(log_traffic_fines)
## [1] 34724
To get the number of cases:
n_cases(log_traffic_fines)
## [1] 10000
To get the number of traces:
n_traces(log_traffic_fines)
## [1] 44
To get the number of activities:
n_activities(log_traffic_fines)
## [1] 11
Recall the formula for Average case length:
\[\displaystyle\frac{Event}{Cases}=\frac{\mathrm{n\_events(log\_traffic\_fines)}}{\mathrm{n\_cases(log\_traffic\_fines)}}\]
n_events(log_traffic_fines)/n_cases(log_traffic_fines)
## [1] 3.4724
\(\displaystyle\frac{Event}{Cases}=3.4724\)
Recall the formula for Process uniformity:
\[\displaystyle 1−\frac{Traces-1}{Cases}=1-\frac{\mathrm{n\_traces(log\_traffic\_fines)-1}}{\mathrm{n\_cases(log\_traffic\_fines)}}\]
1-(n_traces(log_traffic_fines)-1)/n_cases(log_traffic_fines)
## [1] 0.9957
\(\displaystyle\frac{Event}{Cases}=0.9957\)
sepsishospital_billinghospitalpatientsCompute and display the Precedence Matrix for absolute frequencies.
process_matrix(log_traffic_fines)
## # A tibble: 47 × 3
## antecedent consequent n
## <fct> <fct> <dbl>
## 1 Add penalty Insert Date Appeal to Prefecture 41
## 2 Add penalty Notify Result Appeal to Offender 3
## 3 Add penalty Payment 1117
## 4 Add penalty Receive Result Appeal from Prefecture 15
## 5 Add penalty Send Appeal to Prefecture 171
## 6 Add penalty Send for Credit Collection 3288
## 7 Appeal to Judge Add penalty 13
## 8 Appeal to Judge End 5
## 9 Appeal to Judge Insert Date Appeal to Prefecture 1
## 10 Create Fine Payment 3443
## # … with 37 more rows
plot(process_matrix(log_traffic_fines))
It is a ggplot object that can be customized.
plot(process_matrix(log_traffic_fines))+
geom_tile(aes(fill=n))+
geom_text(aes(label=n),color="white",size=2.5)+
ggtitle("Traffic fines process map")+
scale_fill_viridis_c(option="turbo")+
theme_light()+
theme(axis.text.x=element_text(angle=45,
hjust=1,
size=8),
axis.text.y=element_text(size=8))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
In addition to the absolute frequencies, other quantities can be computed:
where absolute, relative (percentage of activity instances) or relative_case (percentage of cases the activity occurs in).
plot(process_matrix(log_traffic_fines,type=frequency(value="relative")))+geom_tile(aes(fill = rel_n))+geom_text(aes(label=round(rel_n * 100, 2)),color="white",size=2.5)
plot(process_matrix(log_traffic_fines,type=frequency(value="absolute-case")))+geom_tile(aes(fill = n_cases))+geom_text(aes(label=n_cases),color="white",size=2.5)
sepsishospital_billinghospitalpatientsThe percentage coverage of the trace to explore. Default is 20% most (in)frequent
trace_explorer(log_traffic_fines, coverage=1)
Instead of setting coverage, you can set an exact number of traces. Should be an integer larger than 0.
trace_explorer(log_traffic_fines, n_traces = 10)
library(TraMineR)
## Warning: package 'TraMineR' was built under R version 4.1.2
##
## TraMineR stable version 2.2-4 (Built: 2022-06-10)
## Website: http://traminer.unige.ch
## Please type 'citation("TraMineR")' for citation information.
Plot selected cases.
seqplot(s_tf,
type="i",
cex.axis=.8,
cex.legend=.8,
legend.prop = .44,
with.legend = "right",
)
Other types :
seqplot(s_tf,
type="dH",
cex.axis=.8,
cex.legend=.8,legend.prop = .44, with.legend = "right",
)
seqplot(s_tf,
type="Ht",
cex.axis=.8,
cex.legend=.8,
legend.prop = .44,
with.legend = "right",
)
seqplot(s_tf,
type="pc",
cex.axis=.8,
cex.legend=.8,
legend.prop = .44,
with.legend = "right",
)
To get the state frequencies use, the seqstatd
function.
seqstatd(s_tf, weighted=FALSE)
## [State frequencies]
## [1] [2] [3] [4] [5] [6]
## Add penalty 0 0.00 0.00000 0.95000 0.04466 0.0248
## Appeal to Judge 0 0.00 0.00000 0.00302 0.00000 0.0000
## Create Fine 1 0.00 0.00000 0.00000 0.00000 0.0000
## Insert Date Appeal to Prefecture 0 0.00 0.00043 0.04052 0.00906 0.0000
## Insert Fine Notification 0 0.00 0.98804 0.00194 0.00000 0.0000
## Notify Result Appeal to Offender 0 0.00 0.00000 0.00000 0.00022 0.0033
## Payment 0 0.34 0.00876 0.00431 0.23581 0.4636
## Receive Result Appeal from Prefecture 0 0.00 0.00000 0.00000 0.00043 0.0199
## Send Appeal to Prefecture 0 0.00 0.00000 0.00022 0.00367 0.3361
## Send Fine 0 0.66 0.00278 0.00000 0.00000 0.0000
## Send for Credit Collection 0 0.00 0.00000 0.00000 0.70615 0.1523
## [7] [8] [9]
## Add penalty 0.032 0.058 0.000
## Appeal to Judge 0.000 0.019 0.082
## Create Fine 0.000 0.000 0.000
## Insert Date Appeal to Prefecture 0.000 0.000 0.000
## Insert Fine Notification 0.000 0.000 0.000
## Notify Result Appeal to Offender 0.145 0.808 0.000
## Payment 0.097 0.058 0.490
## Receive Result Appeal from Prefecture 0.661 0.000 0.000
## Send Appeal to Prefecture 0.048 0.000 0.061
## Send Fine 0.000 0.000 0.000
## Send for Credit Collection 0.016 0.058 0.367
##
## [Valid states]
## [1] [2] [3] [4] [5] [6] [7] [8]
## N 10000 10000 4682 4640 4635 604 62 52
## [9]
## N 49
##
## [Entropy index]
## [1] [2] [3] [4] [5] [6] [7] [8]
## H 0 0.27 0.03 0.097 0.33 0.5 0.46 0.31
## [9]
## H 0.46
sepsishospital_billinghospitalpatientslibrary(heuristicsmineR)
## Warning: package 'heuristicsmineR' was built under R version 4.1.2
##
## Attaching package: 'heuristicsmineR'
## The following object is masked from 'package:processmapR':
##
## precedence_matrix
process_map(L_heur_1)
You can easily filter the data to map using the %>%
operator.
pm<-L_heur_1 %>%
filter_activity_frequency(percentage=1) %>%
filter_trace_frequency(percentage=1) %>%
process_map(sec_nodes=frequency(value="absolute"),
type=performance(FUN=mean,
units="hours",
color_scale="Greens",
color_edges="Greens"),
unit="hours")
pm
pm<-L_heur_1 %>%
filter_activity_frequency(percentage=.5) %>%
filter_trace_frequency(percentage=.5) %>%
process_map(sec_nodes=frequency(value="absolute"),
type=performance(FUN=mean,
units="hours",
color_scale="Greens",
color_edges="Greens"),
unit="hours")
pm
sepsishospital_billinghospitalpatientsFull dataset
process_map(log_traffic_fines, rankdir = "TB")
Filtering to get a 90% coverage of activity instances.
log_traffic_fines %>%
filter_trace_frequency(percentage=.9) %>%
process_map(rankdir = "TB")
sepsishospital_billinghospitalpatientslibrary("viridisLite")
m<-log_sepsis %>%
dependency_matrix(threshold=0) %>%
(reshape2::melt)
m[m$value!=0,] %>%
ggplot(aes(consequent, antecedent, label=round(value,2)))+
geom_tile(aes(fill=value), color="grey27")+
geom_text(color="white",size=2.5)+
theme_light()+
theme(axis.text.x=element_text(angle=45,
hjust=1,
size=8),
axis.text.y=element_text(size=8))
library("viridisLite")
m<-log_sepsis %>%
dependency_matrix(threshold=.9) %>%
(reshape2::melt)
m[m$value!=0,] %>%
ggplot(aes(consequent, antecedent, label=round(value,2)))+
geom_tile(aes(fill=value), color="grey27")+
geom_text(color="white",size=2.5)+
theme_light()+
theme(axis.text.x=element_text(angle=45,
hjust=1,
size=8),
axis.text.y=element_text(size=8))
To compute the process map, first:
sepsis
## # A tibble: 15,214 × 34
## case_id activity lifecycle resource timestamp age crp diagnose
## <chr> <fct> <fct> <fct> <dttm> <int> <dbl> <chr>
## 1 A ER Regis… complete A 2014-10-22 11:15:41 85 NA A
## 2 A Leucocyt… complete B 2014-10-22 11:27:00 NA NA <NA>
## 3 A CRP complete B 2014-10-22 11:27:00 NA 210 <NA>
## 4 A LacticAc… complete B 2014-10-22 11:27:00 NA NA <NA>
## 5 A ER Triage complete C 2014-10-22 11:33:37 NA NA <NA>
## 6 A ER Sepsi… complete A 2014-10-22 11:34:00 NA NA <NA>
## 7 A IV Liquid complete A 2014-10-22 14:03:47 NA NA <NA>
## 8 A IV Antib… complete A 2014-10-22 14:03:47 NA NA <NA>
## 9 A Admissio… complete D 2014-10-22 14:13:19 NA NA <NA>
## 10 A CRP complete B 2014-10-24 09:00:00 NA 1090 <NA>
## # … with 15,204 more rows, and 26 more variables: diagnosticartastrup <chr>,
## # diagnosticblood <chr>, diagnosticecg <chr>, diagnosticic <chr>,
## # diagnosticlacticacid <chr>, diagnosticliquor <chr>, diagnosticother <chr>,
## # diagnosticsputum <chr>, diagnosticurinaryculture <chr>,
## # diagnosticurinarysediment <chr>, diagnosticxthorax <chr>, disfuncorg <chr>,
## # hypotensie <chr>, hypoxie <chr>, infectionsuspected <chr>, infusion <chr>,
## # lacticacid <chr>, leucocytes <chr>, oligurie <chr>, …
log_sepsis <- simple_eventlog(sepsis)
process_map(log_sepsis, rankdir = "TB")
Quite difficult to use, we need to mine it.
To compute the causal net, first:
sepsis
## # A tibble: 15,214 × 34
## case_id activity lifecycle resource timestamp age crp diagnose
## <chr> <fct> <fct> <fct> <dttm> <int> <dbl> <chr>
## 1 A ER Regis… complete A 2014-10-22 11:15:41 85 NA A
## 2 A Leucocyt… complete B 2014-10-22 11:27:00 NA NA <NA>
## 3 A CRP complete B 2014-10-22 11:27:00 NA 210 <NA>
## 4 A LacticAc… complete B 2014-10-22 11:27:00 NA NA <NA>
## 5 A ER Triage complete C 2014-10-22 11:33:37 NA NA <NA>
## 6 A ER Sepsi… complete A 2014-10-22 11:34:00 NA NA <NA>
## 7 A IV Liquid complete A 2014-10-22 14:03:47 NA NA <NA>
## 8 A IV Antib… complete A 2014-10-22 14:03:47 NA NA <NA>
## 9 A Admissio… complete D 2014-10-22 14:13:19 NA NA <NA>
## 10 A CRP complete B 2014-10-24 09:00:00 NA 1090 <NA>
## # … with 15,204 more rows, and 26 more variables: diagnosticartastrup <chr>,
## # diagnosticblood <chr>, diagnosticecg <chr>, diagnosticic <chr>,
## # diagnosticlacticacid <chr>, diagnosticliquor <chr>, diagnosticother <chr>,
## # diagnosticsputum <chr>, diagnosticurinaryculture <chr>,
## # diagnosticurinarysediment <chr>, diagnosticxthorax <chr>, disfuncorg <chr>,
## # hypotensie <chr>, hypoxie <chr>, infectionsuspected <chr>, infusion <chr>,
## # lacticacid <chr>, leucocytes <chr>, oligurie <chr>, …
log_sepsis <- simple_eventlog(sepsis)
library(heuristicsmineR)
plot(dependency_matrix(log_sepsis))+geom_raster(aes(fill = dep))+geom_text(aes(label=round(dep, 2)),color="white",size=2.5)
The option dependency_type enables to tune the
computation of the dependency matrix. For instance,
dependency_type=dependency_type_fhm(...) where
... stands for:
dependency_type_fhm(
threshold_dependency = 0.9,
threshold_l1 = threshold_dependency,
threshold_l2 = threshold_dependency,
threshold_frequency = 0,
all_connected = FALSE,
endpoints_connected = FALSE
)
with
threshold_dependency A dependency threshold, usually
in the interval [0,1], filtering out dependencies below the
threshold.
threshold_l1 A dependency threshold, usually in the
interval [0,1], filtering out self-loop dependencies below the
threshold.
threshold_l2 A dependency threshold, usually in the
interval [0,1], filtering out length-two loop dependencies below the
threshold.
threshold_frequency An absolute frequency threshold
filtering dependencies which are observed infrequently.
all_connected If TRUE the best antecedent and
consequent (as determined by the dependency measure) are going to be
added regardless of the threshold value.
endpoints_connected If TRUE the start/end activity
is added as antecedent/consequent when an activity would not be
connected according to the threshold value.
library("viridisLite")
m<-log_sepsis %>%
dependency_matrix(threshold=.8) %>%
(reshape2::melt)
m[m$value!=0,] %>%
ggplot(aes(consequent, antecedent, label=round(value,2)))+
geom_tile(aes(fill=value), color="grey27")+
scale_fill_gradientn(colors=viridis(256),
name="Dependency\nmeasure")+
theme_light()+
theme(axis.text.x=element_text(angle=45,
hjust=1,
size=8),
axis.text.y=element_text(size=8))
cn_sep0<-causal_net(log_sepsis)
## Warning in check_dependencies(dependencies): Activities [Release E] have neither
## an antecedent or consequent in the supplied dependency matrix.Consider using
## the `all_connected` or `endpoints_connected` parameter when generating the
## dependency matrix.
cn_sep0
## Nodes
## # A tibble: 18 × 12
## act from_id n n_distinct_cases bindings_input bindings_output label
## <chr> <int> <dbl> <dbl> <list> <list> <chr>
## 1 Admissio… 1 117 110 <list [1]> <list [0]> "Adm…
## 2 Admissio… 2 1182 800 <list [3]> <list [4]> "Adm…
## 3 CRP 3 3262 1007 <list [3]> <list [7]> "CRP…
## 4 End 4 1050 1050 <list [51]> <list [0]> "End"
## 5 ER Regis… 5 1050 1050 <list [1]> <list [1]> "ER …
## 6 ER Sepsi… 6 1049 1049 <list [1]> <list [3]> "ER …
## 7 ER Triage 7 1053 1050 <list [1]> <list [1]> "ER …
## 8 IV Antib… 8 823 823 <list [1]> <list [4]> "IV …
## 9 IV Liquid 9 753 753 <list [3]> <list [1]> "IV …
## 10 LacticAc… 10 1466 860 <list [1]> <list [2]> "Lac…
## 11 Leucocyt… 11 3383 1012 <list [3]> <list [6]> "Leu…
## 12 Release A 12 671 671 <list [5]> <list [2]> "Rel…
## 13 Release B 13 56 56 <list [1]> <list [1]> "Rel…
## 14 Release C 14 25 25 <list [1]> <list [1]> "Rel…
## 15 Release D 15 24 24 <list [1]> <list [2]> "Rel…
## 16 Release E 16 6 6 <list [0]> <list [0]> "Rel…
## 17 Return ER 17 294 294 <list [2]> <list [1]> "Ret…
## 18 Start 18 1050 1050 <list [0]> <list [5]> "Sta…
## # … with 5 more variables: color_level <dbl>, shape <chr>, fontcolor <chr>,
## # color <chr>, tooltip <chr>
## Edges
## # A tibble: 36 × 8
## antecedent consequent dep from_id to_id n label penwidth
## <chr> <chr> <dbl> <int> <int> <dbl> <chr> <dbl>
## 1 IV Antibiotics Admission IC 0.979 8 1 98 98 1.17
## 2 Admission NC Admission NC 0.994 2 2 381 381 1.64
## 3 IV Antibiotics Admission NC 0.990 8 2 690 690 2.17
## 4 CRP CRP 0.997 3 3 2251 2251 4.81
## 5 Start CRP 0.909 18 3 1006 1006 2.70
## 6 Admission NC End 0.933 2 4 129 129 1.22
## 7 CRP End 0.976 3 4 241 241 1.41
## 8 ER Sepsis Triage End 0.98 6 4 226 226 1.38
## 9 IV Antibiotics End 0.989 8 4 124 124 1.21
## 10 IV Liquid End 0.923 9 4 752 752 2.27
## # … with 26 more rows
Consider using the all_connected or
endpoints_connected parameter when generating the
dependency matrix.
cn_sep<-causal_net(log_sepsis, all_connected = TRUE)
cn_sep
## Nodes
## # A tibble: 18 × 12
## act from_id n n_distinct_cases bindings_input bindings_output label
## <chr> <int> <dbl> <dbl> <list> <list> <chr>
## 1 Admissio… 1 117 110 <list [1]> <list [1]> "Adm…
## 2 Admissio… 2 1182 800 <list [3]> <list [4]> "Adm…
## 3 CRP 3 3262 1007 <list [3]> <list [8]> "CRP…
## 4 End 4 1050 1050 <list [54]> <list [0]> "End"
## 5 ER Regis… 5 1050 1050 <list [1]> <list [1]> "ER …
## 6 ER Sepsi… 6 1049 1049 <list [1]> <list [3]> "ER …
## 7 ER Triage 7 1053 1050 <list [1]> <list [1]> "ER …
## 8 IV Antib… 8 823 823 <list [1]> <list [4]> "IV …
## 9 IV Liquid 9 753 753 <list [3]> <list [1]> "IV …
## 10 LacticAc… 10 1466 860 <list [3]> <list [2]> "Lac…
## 11 Leucocyt… 11 3383 1012 <list [3]> <list [6]> "Leu…
## 12 Release A 12 671 671 <list [5]> <list [2]> "Rel…
## 13 Release B 13 56 56 <list [1]> <list [1]> "Rel…
## 14 Release C 14 25 25 <list [1]> <list [1]> "Rel…
## 15 Release D 15 24 24 <list [1]> <list [2]> "Rel…
## 16 Release E 16 6 6 <list [1]> <list [1]> "Rel…
## 17 Return ER 17 294 294 <list [2]> <list [1]> "Ret…
## 18 Start 18 1050 1050 <list [0]> <list [5]> "Sta…
## # … with 5 more variables: color_level <dbl>, shape <chr>, fontcolor <chr>,
## # color <chr>, tooltip <chr>
## Edges
## # A tibble: 39 × 8
## antecedent consequent dep from_id to_id n label penwidth
## <chr> <chr> <dbl> <int> <int> <dbl> <chr> <dbl>
## 1 IV Antibiotics Admission IC 0.979 8 1 98 98 1.17
## 2 Admission NC Admission NC 0.994 2 2 381 381 1.64
## 3 IV Antibiotics Admission NC 0.990 8 2 690 690 2.17
## 4 CRP CRP 0.997 3 3 2251 2251 4.81
## 5 Start CRP 0.909 18 3 1006 1006 2.70
## 6 Admission NC End 0.933 2 4 129 129 1.22
## 7 CRP End 0.976 3 4 235 235 1.40
## 8 ER Sepsis Triage End 0.98 6 4 226 226 1.38
## 9 IV Antibiotics End 0.989 8 4 124 124 1.21
## 10 IV Liquid End 0.923 9 4 752 752 2.27
## # … with 29 more rows
traffic_fineshospital_billinghospitalpatientspn1<-(as.petrinet(cn_sep))
unique(pn1$flows$to[!pn1$flows$to %in% pn1$flows$from])
## [1] "p_in_4"
render_PN(pn1)
Place/Transition Connecting Degree (PTCD)
(n_flows(pn1)/n_places(pn1)+n_flows(pn1)/n_transitions(pn1))/2
## [1] 4.664809
traffic_fineshospital_billinghospitalpatientslibrary(processanimateR)
## Warning: package 'processanimateR' was built under R version 4.1.2
animate_process(log_traffic_fines)
A more intricate example, from
library(processanimateR)
library(dplyr)
library(bupaR)
# Extract only the lacticacid measurements
lactic <- log_sepsis %>%
mutate(lacticacid = as.numeric(lacticacid)) %>%
filter_activity(c("LacticAcid")) %>%
as.data.frame() %>%
select("case" = case_id,
"time" = timestamp,
value = lacticacid) # format needs to be 'case,time,value'
# Remove the measurement events from the sepsis log
sepsisBase <- log_sepsis %>%
filter_activity(c("LacticAcid", "CRP", "Leucocytes", "Return ER",
"IV Liquid", "IV Antibiotics"), reverse = T) %>%
filter_trace_frequency(percentage = 0.95)
# Animate with the secondary data frame `lactic`
animate_process(sepsisBase,
mode = "relative",
duration = 300,
legend = "color",
mapping = token_aes(color = token_scale(lactic,
scale = "linear",
range = c("#fff5eb","#7f2704"))))
traffic_fineshospital_billinghospitalpatients